import scrapy

from zhongyaocai_2.items import Zhongyaocai2Item
from pymongo import MongoClient

# 连接到MongoDB，默认连接到localhost的27017端口
client = MongoClient('mongodb://erp:erp123@localhost:27017', maxPoolSize=10,
                     minPoolSize=3,
                     maxConnecting=30,
                     socketTimeoutMS=100000,
                     maxIdleTimeMS=60000,
                     connectTimeoutMS=40000,
                     waitQueueTimeoutMS=10000)

db = client['erp']
collection = db['zhongyaocai_letter']
collection_index = db['zhongyaocai_index']


class Zhongyaocai2spiderSpider(scrapy.Spider):
    name = "zhongyaocai2spider"
    allowed_domains = ["www.zysj.com.cn"]
    base_url = "https://www.zysj.com.cn/"
    start_urls = []
    letter = ""
    parent_url = ""

    # 更新：通过接口获取新闻列表
    def start_requests(self):
        try:

            letter_cursor = collection.find() #.skip(22).limit(1)
            for document in letter_cursor:
                zhongyaocaiItem = Zhongyaocai2Item(**document)
                url = zhongyaocaiItem["url"]
                request_url = self.base_url+url
                zhongyaocaiItem["request_url"] = request_url

                yield scrapy.Request(url=request_url,callback=self.parse,meta=zhongyaocaiItem)
        except Exception as e:
            print(f"app error:{e}")

    def parse(self, response):

        data = response.xpath('//*[@id="list-content"]/ul/li/a[contains(@href,"/zhongyaocai")]')

        for index,yaocai_detail in enumerate(data):
            try:
                letter = response.meta.get("letter")
                parent_url = response.meta.get("request_url")
                index_url = yaocai_detail.xpath("@href").get()
                name_zh = yaocai_detail.xpath("text()").get()
                letter_zh = name_zh[0]
                name_pinyin = index_url[len("/zhongyaocai/"):len(index_url) - len("/index.html")]

                collection_index.insert_one({"index_url":index_url,"name_zh":name_zh,"name_pinyin":name_pinyin,
                                             "letter":letter,"parent_url":parent_url})
                print(index_url,name_zh,letter_zh,name_pinyin,letter,parent_url)
            except Exception as e:
                print(f"app error:{e}")
